# ==============================================================================
# file name: SI-sample-attrition.R
# authors: Bernhard Clemm 
# date: March 17, 2022
# purpose: Describe sample attrition
# ==============================================================================

# SETUP ========================================================================

basedir <- paste0(dirname(dirname(
  rstudioapi::getSourceEditorContext()$path)), "/")
codedir <- paste0(basedir, "code/")
datadir <- paste0(basedir, "data/")

library(tidyverse)
library(kableExtra)

data_wide <- read.csv(paste0(datadir, "processed/data_wide.csv"))

# US =================================================================

data_wide_us_w3 <- data_wide %>% 
  filter(country == "US" & !is.na(ResponseId_w3))
data_wide_us_invited <- data_wide %>% 
  filter(country == "US" & !is.na(invited))
data_wide_us_consented <- data_wide %>% 
  filter(country == "US" & !is.na(consented))
data_wide_us_post <- data_wide %>% 
  filter(country == "US" & !is.na(respondent_id_post))

attrition_us <- 
  rbind(
    data.frame(prop.table(table(data_wide_us_w3$gender_cat))*100, variable = "Gender"),
    data.frame(prop.table(table(data_wide_us_w3$age_cat))*100, variable = "Age"), 
    data.frame(prop.table(table(data_wide_us_w3$edu_cat))*100, variable = "Education")) %>%
  rename(value = Var1, started = Freq) %>%
  left_join(., rbind(
    data.frame(prop.table(table(data_wide_us_invited$gender_cat))*100),
    data.frame(prop.table(table(data_wide_us_invited$age_cat))*100), 
    data.frame(prop.table(table(data_wide_us_invited$edu_cat))*100)) %>%
      rename(value = Var1, invited = Freq)) %>%
  left_join(., rbind(
    data.frame(prop.table(table(data_wide_us_consented$gender_cat))*100),
    data.frame(prop.table(table(data_wide_us_consented$age_cat))*100), 
    data.frame(prop.table(table(data_wide_us_consented$edu_cat))*100)) %>%
      rename(value = Var1, consented = Freq)) %>%
  left_join(., rbind(
    data.frame(prop.table(table(data_wide_us_post$gender_cat))*100),
    data.frame(prop.table(table(data_wide_us_post$age_cat))*100),
    data.frame(prop.table(table(data_wide_us_post$edu_cat))*100)) %>%
      rename(value = Var1, post = Freq)) %>%
  mutate(across(where(is.numeric), round, 2)) %>%
  select(variable, value, started, invited, consented, post)

# Add significance tests
p_gender_us <- round(chisq.test(rbind(
  (table(data_wide_us_w3$gender_cat)),
  (table(data_wide_us_invited$gender_cat)),
  (table(data_wide_us_consented$gender_cat)),
  (table(data_wide_us_post$gender_cat))))$p.value, 3)
p_age_us <- round(chisq.test(rbind(
  (table(data_wide_us_w3$age_cat)),
  (table(data_wide_us_invited$age_cat)),
  (table(data_wide_us_consented$age_cat)),
  (table(data_wide_us_post$age_cat))))$p.value, 3)
p_edu_us <- round(chisq.test(rbind(
  (table(data_wide_us_w3$edu_cat)),
  (table(data_wide_us_invited$edu_cat)),
  (table(data_wide_us_consented$edu_cat)),
  (table(data_wide_us_post$edu_cat))))$p.value, 3)

attrition_us <- attrition_us %>% 
  mutate(sig = case_when(
    variable == "Gender" ~ p_gender_us,
    variable == "Age" ~ p_age_us,
    variable == "Education" ~ p_edu_us)) 

kable(attrition_us, 
      caption = "Attrition by demographics (US)", 
      format = "latex", booktabs = T, escape = F, linesep = "",
      row.names = F, 
      col.names = c("", "", "Pre-Wave (started)", "Pre-Wave (invited to exp.)", 
                    "Pre-Wave (accepted)", "Post-Wave", "p-value (difference)")) %>%

  collapse_rows(columns = 1, valign = "middle", latex_hline = "major") %>%
  collapse_rows(columns = 7, valign = "middle", latex_hline = "major") 

# Poland ======================================================================

data_wide_pl_w2 <- data_wide %>% 
  filter(country == "PL" & !is.na(respondent_id_w2))
data_wide_pl_consented <- data_wide %>% 
  filter(country == "PL" & !is.na(consented))
data_wide_pl_post <- data_wide %>% 
  filter(country == "PL" & !is.na(respondent_id_post))

attrition_pl <- 
  rbind(
    data.frame(prop.table(table(data_wide_pl_w2$gender_cat))*100, variable = "Gender"),
    data.frame(prop.table(table(data_wide_pl_w2$age_cat))*100, variable = "Age"), 
    data.frame(prop.table(table(data_wide_pl_w2$edu_cat))*100, variable = "Education")
  ) %>%
  rename(value = Var1, started = Freq) %>%
  left_join(., rbind(
    data.frame(prop.table(table(data_wide_pl_consented$gender_cat))*100),
    data.frame(prop.table(table(data_wide_pl_consented$age_cat))*100), 
    data.frame(prop.table(table(data_wide_pl_consented$edu_cat))*100)
  ) %>%
    rename(value = Var1, consented = Freq)) %>%
  left_join(., rbind(
    data.frame(prop.table(table(data_wide_pl_post$gender_cat))*100),
    data.frame(prop.table(table(data_wide_pl_post$age_cat))*100),
    data.frame(prop.table(table(data_wide_pl_post$edu_cat))*100)
  ) %>%
    rename(value = Var1, post = Freq)) %>%
  mutate(across(where(is.numeric), round, 2)) %>%
  select(variable, value, started, consented, post)

# Add significance tests
p_gender_pl <- round(chisq.test(rbind(
  (table(data_wide_pl_w2$gender_cat)),
  (table(data_wide_pl_consented$gender_cat)),
  (table(data_wide_pl_post$gender_cat))))$p.value, 3)
p_age_pl <- round(chisq.test(rbind(
  (table(data_wide_pl_w2$age_cat)),
  (table(data_wide_pl_consented$age_cat)),
  (table(data_wide_pl_post$age_cat))))$p.value, 3)
p_edu_pl <- round(chisq.test(rbind(
  (table(data_wide_pl_w2$edu_cat)),
  (table(data_wide_pl_consented$edu_cat)),
  (table(data_wide_pl_post$edu_cat))))$p.value, 3)

attrition_pl <- attrition_pl %>% 
  mutate(sig = case_when(
    variable == "Gender" ~ p_gender_pl,
    variable == "Age" ~ p_age_pl,
    variable == "Education" ~ p_edu_pl)) 

kable(attrition_pl, 
      caption = "Attrition by demographics (PL)", 
      format = "latex", booktabs = T, escape = F, linesep = "",
      row.names = F, 
      col.names = c("", "", "Pre-Wave (started)", 
                    "Pre-Wave (accepted)", "Post-Wave", "p-value (difference)")) %>%
  collapse_rows(columns = 1, valign = "middle", latex_hline = "major") %>%
  collapse_rows(columns = 6, valign = "middle", latex_hline = "major") 
